#using read.table corals<-read.table('ecol 562/corals.csv',header=T,sep=',') #using read.csv corals<-read.csv('ecol 562/corals.csv',header=T) names(corals) #plot response versus coral cover plot(PREV_1~CORAL_COVE,data=corals,ylim=c(0,50)) #add regression smoother lines(lowess(corals$PREV_1~corals$CORAL_COVE),col=2) #plot response versus stress metric plot(PREV_1~WSSTA,data=corals,ylim=c(0,50)) lines(lowess(corals$PREV_1~corals$WSSTA),col=2) ### normal model #### lm(PREV_1~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals) -> model1 anova(model1) summary(model1) # each observation has a unique value of coral cover length(unique(corals$CORAL_COVE)) dim(corals) #explanation of the order function x<-sample(1:10) x order(x) x[order(x)] #sort data in coral cover order corals.sort<-corals[order(corals$CORAL_COVE),] #refit model to sorted data lm(PREV_1~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort) -> model1 #plot raw data and normal mean plot(PREV_1~CORAL_COVE,data=corals.sort,ylim=c(-25,350),cex=.6) lines(corals.sort$CORAL_COVE,predict(model1),col=2) #add 95% confidence interval for mean plot(PREV_1~CORAL_COVE,data=corals.sort,ylim=c(-100,350),cex=.6) lines(corals.sort$CORAL_COVE,predict(model1),col=2) lines(corals.sort$CORAL_COVE,predict(model1)-1.96*summary(model1)$sigma,col=4,lty=2) lines(corals.sort$CORAL_COVE,predict(model1)+1.96*summary(model1)$sigma,col=4,lty=2) #simulate data from normal model set.seed(10) test.data <- sapply(predict(model1),function(x) rnorm(1,x,summary(model1)$sigma)) #display two graphs in same window par(mfrow=c(2,1)) plot(PREV_1~CORAL_COVE,data=corals.sort,ylim=c(-100,350),cex=.6) plot(test.data~corals.sort$CORAL_COVE,cex=.6,ylim=c(-100,350)) abline(h=0,col=2,lty=2) ##### Poisson model ##### glm(PREV_1~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort,family=poisson) -> model2 #simulate data from Poisson model set.seed(12) test.data2 <- sapply(exp(predict(model2)),function(x) rpois(1,x)) plot(PREV_1~CORAL_COVE,data=corals.sort,ylim=c(-100,350),cex=.6) plot(test.data2~corals.sort$CORAL_COVE,cex=.6,ylim=c(-100,350)) anova(model2,test='Chisq') summary(model2) #compare log-likelihood and AIC of Poisson and normal model logLik(model1) logLik(model2) AIC(model1) AIC(model1,model2) #log-likelihood function for normal model using midpoint approximation norm.loglike1 <- function(model,y) { sigma2 <- (sum(residuals(model)^2))/length(y) prob <- dnorm(y,mean=predict(model),sd=sqrt(sigma2)) loglike <- sum(log(prob)) loglike } norm.loglike1(model1,corals.sort$PREV_1) #log-likelihood function for normal model using exact probability calculation norm.loglike2 <- function(model,y) { sigma2 <- (sum(residuals(model)^2))/length(y) prob <- pnorm(y+.5,mean=predict(model),sd=sqrt(sigma2))-pnorm(y-.5,mean=predict(model),sd=sqrt(sigma2)) loglike <- sum(log(prob)) loglike } norm.loglike2(model1,corals.sort$PREV_1) ###### Negative binomial model ####### library(MASS) glm.nb(PREV_1~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort)->model3 anova(model3) summary(model3) #compare coefficients of three models fit so far coef(model1) coef(model2) coef(model3) #compare AIC of models AIC(model1,model2,model3) #dispersion parameter is called theta names(model3) #simulate data from negative binomial model set.seed(12) test.data3<-sapply(fitted(model3),function(x) rnbinom(1,mu=x,size=model3$theta)) #graph raw data, Poisson data, and NB data par(mfrow=c(3,1)) plot(PREV_1~CORAL_COVE,data=corals.sort,ylim=c(0,350),cex=.6) plot(test.data2~corals.sort$CORAL_COVE,cex=.6,ylim=c(0,350)) plot(test.data3~corals.sort$CORAL_COVE,cex=.6,ylim=c(0,350)) ##### log-normal model ####### #because of zeros log is undefined lm(log(PREV_1)~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort)->model4 #add a small constant lm(log(PREV_1+.5)~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort)->model4 #AIC of this model is not comparable to previous models because reponse is transformed AIC(model4) #function to determine optimal constant to use norm.loglike3 <- function(k,y) { lm(log(PREV_1+k)~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort)->model sigma2 <- (sum(residuals(model)^2))/length(y) prob <- ifelse(y==0, pnorm(log(y+k+.5),mean=predict(model),sd=sqrt(sigma2)), pnorm(log(y+k+.5),mean=predict(model),sd=sqrt(sigma2)) -pnorm(log(y+k-.5),mean=predict(model),sd=sqrt(sigma2))) loglike <- sum(log(prob)) loglike } #test the function norm.loglike3(.5,corals.sort$PREV_1) norm.loglike3(.7,corals.sort$PREV_1) norm.loglike3(.4,corals.sort$PREV_1) #evaluate function on a range of values sapply(seq(.1,1,.01),function(x) norm.loglike3(x,corals.sort$PREV_1))->out.L #plot log-liklihood par(mfrow=c(1,1)) plot(seq(.1,1,.01),out.L,type='l') #try a different range of values sapply(seq(.01,.4,.01),function(x) norm.loglike3(x,corals.sort$PREV_1))->out.L plot(seq(.01,.4,.01),out.L,type='l') #locate maximum max(out.L) #which value gave rise to the max? which.max(out.L) seq(.01,4,.01)[which.max(out.L)] #refit the model using this constant lm(log(PREV_1+.13)~WSSTA*CORAL_COVE+I(WSSTA^2),data=corals.sort)->model4 anova(model4) #compare coefficients of all the models we fit t(sapply(list(model1,model2,model3,model4),coef)) #compare log-liklihood of this model with NB model norm.loglike3(.13,corals.sort$PREV_1) logLik(model3) #compare AIC AIC(model3) -2*norm.loglike3(.13,corals.sort$PREV_1)+2*6 #count constant as an estimated parameter -2*norm.loglike3(.13,corals.sort$PREV_1)+2*7 #graph the predictions of each model # use 30% as the value for coral cover--the mean linfunc<-function(model,x,cover) coef(model)[1] +coef(model)[2]*x +coef(model)[3]*cover+ coef(model)[4]*x^2+coef(model)[5]*x*cover curve(linfunc(model1,x,30),from=0,to=30,ylim=c(0,25)) curve(exp(linfunc(model4,x,30)),add=T,lty=2) curve(exp(linfunc(model2,x,30)),add=T,col=2) curve(exp(linfunc(model3,x,30)),add=T,col=2,lty=2) legend('topright', c('normal','lognormal','Poisson','negative binomial'), col=c(1,1,2,2), lty=c(1,2,1,2), cex=.9, bty='n')